In [1]:
    
from scipy.interpolate import interp1d
    
In [2]:
    
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
    
In [3]:
    
def estimate_mean(data, weight):
    return np.sum(data * weight) / np.sum(weight)
def estimate_std(data, weight, mean):
    variance = np.sum(weight * (data - mean)**2) / np.sum(weight)
    return np.sqrt(variance)
    
In [4]:
    
np.random.seed(110) # for reproducible random results
# set parameters
red_mean = 3
red_std = 0.8
blue_mean = 7
blue_std = 2
    
In [5]:
    
# draw 20 samples from normal distributions with red/blue parameters
red = np.random.normal(red_mean, red_std, size=20)
blue = np.random.normal(blue_mean, blue_std, size=20)
both_colours = np.sort(np.concatenate((red, blue))) # for later use...
    
In [6]:
    
plt.scatter(red, np.zeros(len(red)), c='red')
plt.scatter(blue, np.zeros(len(blue)), c='blue')
plt.show()
    
    
In [7]:
    
# estimates for the mean
red_mean_guess = 1.1
blue_mean_guess = 9
# estimates for the standard deviation
red_std_guess = 2
blue_std_guess = 1.7
    
In [8]:
    
likelihood_of_red = stats.norm(red_mean_guess, red_std_guess).pdf(both_colours)
likelihood_of_blue = stats.norm(blue_mean_guess, blue_std_guess).pdf(both_colours)
    
In [9]:
    
f1 = interp1d(both_colours, likelihood_of_red,  kind='cubic')
f2 = interp1d(both_colours, likelihood_of_blue, kind='cubic')
xnew = np.linspace(min(both_colours), max(both_colours), num=41, endpoint=True)
plt.scatter(red, np.zeros(len(red)), c='red')
plt.scatter(blue, np.zeros(len(blue)), c='blue')
plt.plot(xnew, f1(xnew), c='#FFCCCC')
plt.plot(xnew, f2(xnew), c='#CCCCFF')
plt.show()
    
    
In [10]:
    
likelihood_total = likelihood_of_red + likelihood_of_blue
red_weight = likelihood_of_red / likelihood_total
blue_weight = likelihood_of_blue / likelihood_total
    
In [11]:
    
# new estimates for standard deviation
blue_std_guess = estimate_std(both_colours, blue_weight, blue_mean_guess)
red_std_guess = estimate_std(both_colours, red_weight, red_mean_guess)
# new estimates for mean
red_mean_guess = estimate_mean(both_colours, red_weight)
blue_mean_guess = estimate_mean(both_colours, blue_weight)
print blue_std_guess, blue_mean_guess
print red_std_guess, red_mean_guess
    
    
In [12]:
    
# estimates for the mean
red_mean_guess = 1.1
blue_mean_guess = 9
# estimates for the standard deviation
red_std_guess = 2
blue_std_guess = 1.7
for i in range(20):
    likelihood_of_red = stats.norm(red_mean_guess, red_std_guess).pdf(both_colours)
    likelihood_of_blue = stats.norm(blue_mean_guess, blue_std_guess).pdf(both_colours)
    f1 = interp1d(both_colours, likelihood_of_red,  kind='cubic')
    f2 = interp1d(both_colours, likelihood_of_blue, kind='cubic')
    xnew = np.linspace(min(both_colours), max(both_colours), num=41, endpoint=True)
    plt.scatter(red, np.zeros(len(red)), c='red')
    plt.scatter(blue, np.zeros(len(blue)), c='blue')
    plt.plot(xnew, f1(xnew), c='#FFCCCC')
    plt.plot(xnew, f2(xnew), c='#CCCCFF')
    likelihood_total = likelihood_of_red + likelihood_of_blue
    red_weight = likelihood_of_red / likelihood_total
    blue_weight = likelihood_of_blue / likelihood_total
    # new estimates for standard deviation
    blue_std_guess = estimate_std(both_colours, blue_weight, blue_mean_guess)
    red_std_guess = estimate_std(both_colours, red_weight, red_mean_guess)
    # new estimates for mean
    red_mean_guess = estimate_mean(both_colours, red_weight)
    blue_mean_guess = estimate_mean(both_colours, blue_weight)
f1 = interp1d(both_colours, likelihood_of_red,  kind='cubic')
f2 = interp1d(both_colours, likelihood_of_blue, kind='cubic')
xnew = np.linspace(min(both_colours), max(both_colours), num=41, endpoint=True)
plt.scatter(red, np.zeros(len(red)), c='red')
plt.scatter(blue, np.zeros(len(blue)), c='blue')
plt.plot(xnew, f1(xnew), c='#FF0000')
plt.plot(xnew, f2(xnew), c='#0000FF')
    
plt.show()